#Convert Dataframe to corpus

# Use tm package convert dataframe to corpus: https://www.rdocumentation.org/packages/tm/versions/0.7-8/topics/DataframeSource
doc_id = c(1:4619)
text_df <- data.frame(doc_id, text = Tweets_state$text, stringsAsFactors = FALSE)

# Convert example_text to a corpus: Success_corpus
tweets_corpus <- VCorpus(DataframeSource(text_df))

corpus <- tm_map(tweets_corpus, content_transformer(tolower))
corpus <- tm_map(corpus, removeNumbers)
corpus <- tm_map(corpus, removeWords, c(stopwords("en")))
corpus <- tm_map(corpus, content_transformer(function(x) gsub("[[:cntrl:]]", "", x))) #remove control characters 
corpus <- tm_map(corpus, content_transformer(function(x) gsub("http\\S+", "", x))) #remove website addresses
corpus <- tm_map(corpus, content_transformer(function(x) gsub("@[A-Za-z0-9]+", "", x))) #remove mentions in text
corpus <- tm_map(corpus, stripWhitespace)
corpus <- tm_map(corpus, removePunctuation)

#Document Term Matrix of tweets
tweets_dtm <- DocumentTermMatrix(corpus)

#Convert DTM to tidy dataframe

tweets_tidy <- tidy(tweets_dtm) %>% 
  mutate(index = as.numeric(document)) %>% 
  left_join(Tweets_state, by=c("index"="id"))
# Get Bing lexicon
bing <- get_sentiments("bing")

# Join text to lexicon
Tweets_bing <- inner_join(tweets_tidy, bing, by = c("term" = "word")) %>%
   # Count by sentiment, index, document
  count(sentiment,index,document, text) %>%
   # Spread sentiments
  spread(sentiment, n, fill = 0) %>%
  mutate(sentiment = positive-negative) 
# Join sentiment score with original twitter data fame
Tweets_sentiment <- Tweets_state %>% 
  inner_join(Tweets_bing, by = c("id" = "index")) %>% 
  select(-c(text.y,document, size=favorite_count)) 

Pos_Neg <- ggplot(Tweets_sentiment, aes(x=positive,y=negative)) +
  geom_jitter(alpha=0.8, color="rosybrown3") +
  theme_minimal()
png("Pos_Neg.png") #Saving plot to png
print(Pos_Neg)
dev.off()
## quartz_off_screen 
##                 2

#Let’s make a choropleth map based on sentiment score by state

# count by state
tw_state <- Tweets_sentiment %>%
  mutate(state = ifelse(state=="new york:long island","new york",
                        ifelse(state=="new york:main","new york",
                        ifelse(state=="new york:main","new york",
                        ifelse(state=="new york:manhattan","new york",
                        ifelse(state=="north carolina:main","north carolina",
                        ifelse(state=="massachusetts:main","massachusetts",
                        ifelse(state=="michigan:north","michigan",
                        ifelse(state=="michigan:south","michigan",
                        ifelse(state=="virginia:main", "virginia",
                        ifelse(state=="washington:main","washington",state))))))))))) %>% 
  group_by(state) %>% 
  mutate(N=n(), T.Sentiment=sum(sentiment), WordCounts = sum(display_text_width)) %>%   #get total number of tweets & sum of sentiment score by state
  select(display_text_width, lon, lat, state, T.Sentiment, N, WordCounts) %>%  #lon & lat are vary within each state.Keep first one. 
  group_by(state, lon, lat) %>% 
  summarise(Avg.Sentiment=T.Sentiment/N, WordCounts=WordCounts) %>% #calculate average sentiment score by state and keep total word counts by state
  ungroup(lon,lat) %>% 
  filter(row_number()==1) %>% 
  mutate(lon=ifelse(state=="new york",-73.935242, lon)) %>%  #change coordinates for New York State
  mutate(lat=ifelse(state=="new york",40.730610,lat)) %>% 
  filter(!is.na(state))
## `summarise()` has grouped output by 'state', 'lon', 'lat'. You can override using the `.groups` argument.
# Polygon stuff from shape file
#install.packages("tigris")
library(tigris)
## To enable 
## caching of data, set `options(tigris_use_cache = TRUE)` in your R script or .Rprofile.
states <- states(cb=T) %>%
  mutate(name = tolower(NAME))
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |                                                                      |   1%
  |                                                                            
  |=                                                                     |   1%
  |                                                                            
  |=                                                                     |   2%
  |                                                                            
  |==                                                                    |   2%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |===                                                                   |   5%
  |                                                                            
  |====                                                                  |   5%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |======                                                                |   8%
  |                                                                            
  |======                                                                |   9%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |=======                                                               |  11%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |=========                                                             |  13%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |==========                                                            |  15%
  |                                                                            
  |===========                                                           |  16%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |============                                                          |  18%
  |                                                                            
  |=============                                                         |  18%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |================                                                      |  23%
  |                                                                            
  |=================                                                     |  24%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |==================                                                    |  26%
  |                                                                            
  |===================                                                   |  27%
  |                                                                            
  |===================                                                   |  28%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |======================                                                |  32%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |========================                                              |  35%
  |                                                                            
  |=========================                                             |  36%
  |                                                                            
  |==========================                                            |  38%
  |                                                                            
  |===========================                                           |  39%
  |                                                                            
  |============================                                          |  40%
  |                                                                            
  |============================                                          |  41%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |=============================                                         |  42%
  |                                                                            
  |==============================                                        |  43%
  |                                                                            
  |==============================                                        |  44%
  |                                                                            
  |===============================                                       |  45%
  |                                                                            
  |================================                                      |  45%
  |                                                                            
  |=================================                                     |  46%
  |                                                                            
  |==================================                                    |  48%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================                                  |  51%
  |                                                                            
  |=====================================                                 |  52%
  |                                                                            
  |=====================================                                 |  53%
  |                                                                            
  |======================================                                |  54%
  |                                                                            
  |======================================                                |  55%
  |                                                                            
  |=======================================                               |  55%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |========================================                              |  57%
  |                                                                            
  |=========================================                             |  58%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |==========================================                            |  61%
  |                                                                            
  |===========================================                           |  62%
  |                                                                            
  |========================================================              |  80%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |==========================================================            |  82%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |===========================================================           |  84%
  |                                                                            
  |===========================================================           |  85%
  |                                                                            
  |============================================================          |  85%
  |                                                                            
  |============================================================          |  86%
  |                                                                            
  |=============================================================         |  87%
  |                                                                            
  |=============================================================         |  88%
  |                                                                            
  |==============================================================        |  89%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |===================================================================   |  95%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |====================================================================  |  98%
  |                                                                            
  |===================================================================== |  98%
  |                                                                            
  |======================================================================| 100%
# Use the Tigris function geo_join to bring together the states shapefile and the tw_states dataframe
states_shape_tweets <- geo_join(states, tw_state, "name", "state")
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
# Creating a color palette based on the number range in the total column
pal <- colorNumeric("YlOrRd", domain=states_shape_tweets$Avg.Sentiment)

# Setting up the pop up text
popup_sb <- paste0("State: ", as.character(states_shape_tweets$NAME),"<br/>",
                  "Average Sentiment Score: ", as.character(states_shape_tweets$Avg.Sentiment),"<br/>",
                 "Total Word Count: ",as.character(states_shape_tweets$WordCounts),"<br/>")

# Mapping it with the new tiles CartoDB.Positron
leaflet() %>%
  addProviderTiles(providers$CartoDB.Positron) %>%
  setView(-98.483330, 38.712046, zoom = 4) %>%
  addPolygons(data = states_shape_tweets ,
              fillColor = ~pal(states_shape_tweets$Avg.Sentiment),
              fillOpacity = 0.7,
              weight = 1.5,
              opacity = 1,
              color = "cornsilk",
              dashArray = "3",
              smoothFactor = 0.2,
              popup = ~popup_sb,
              highlight = highlightOptions(weight = 4,
                                           color = "#839EA8",
                                           dashArray = "",
                                           fillOpacity = 0.7,
                                           bringToFront = TRUE),
              popupOptions = popupOptions(style = list("font-weight" = "normal",
                                                       padding = "3px 8px",
                                                       "box-shadow" = "3px 3px rgba(0,0,0,0.25)"),
                                          textsize = "15px",
                                          direction = "auto")) %>%
  addLegend(pal = pal,
            values = states_shape_tweets$Avg.Sentiment,
            position = "bottomleft",
            title = "Average Sentiment Score",
            opacity = 0.7)
## Warning: sf layer has inconsistent datum (+proj=longlat +datum=NAD83 +no_defs).
## Need '+proj=longlat +datum=WGS84'
## Warning in validateCoords(lng, lat, funcName): Data contains 7 rows with either
## missing or invalid lat/lon values and will be ignored